💡 Research Context
This script analyzes targeted sequencing data from patients with hepatocellular carcinoma (HCC) who have been treated with transarterial chemoembolization (TACE) as part of my PhD thesis research project(Chapter 1️⃣).
👨🏻⚕️ Clinical Background
Hepatocellular carcinoma (HCC) is one of the most common malignancies worldwide, with increasing incidence in many countries. Transarterial chemoembolization (TACE) is a standard treatment for intermediate-stage HCC patients according to the Barcelona Clinic Liver Cancer (BCLC) staging system.
🎯 Study Aims
This analysis aims to investigate the genetic mutational landscape of HCC patients treated with TACE and to identify potential biomarkers for treatment response and survival outcomes.
🍏 Analytical Approach
We use maftools for mutation analysis and survival analysis techniques to evaluate associations between mutated genes and clinical outcomes including overall survival (OS) and progression-free survival (PFS).
# Set CRAN mirror to avoid errors
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Function to safely install and load packages
install_and_load <- function(packages, bioconductor = FALSE) {
for (package in packages) {
# Check if package is installed
if (!requireNamespace(package, quietly = TRUE)) {
message(paste("Installing package:", package))
# Install from appropriate source
if (bioconductor) {
if (!requireNamespace("BiocManager", quietly = TRUE)) {
install.packages("BiocManager")
}
BiocManager::install(package, update = FALSE, ask = FALSE)
} else {
install.packages(package, repos = "https://cloud.r-project.org")
}
}
# Load package
suppressWarnings(suppressPackageStartupMessages(
library(package, character.only = TRUE)
))
}
}
# Install and load remotes package first
install_and_load("remotes")
# Regular CRAN packages
cran_packages <- c("survival", "lubridate", "ggfortify", "flexsurv", "remotes",
"evaluate", "knitr", "dplyr", "ggplot2", "magrittr", "grid",
"broom.helpers", "data.table", "plotly", "DT", "flexdashboard",
"htmlwidgets", "visNetwork", "kableExtra", "RColorBrewer", "DT")
# Bioconductor packages
bioc_packages <- c("ggsurvfit", "gtsummary", "tidycmprsk", "ranger",
"survminer", "survcomp", "maftools")
# Install and load packages
install_and_load(cran_packages)
install_and_load(bioc_packages, bioconductor = TRUE)
# Create bibliography directory if it doesn't exist
if (!dir.exists("bib")) {
dir.create("bib")
}
# Generate package bibliography
all_packages <- c(.packages(), 'rmarkdown', 'knitr')
knitr::write_bib(x = all_packages, file = 'bib/packages.bib')
# Install maftools from GitHub if needed
if (!requireNamespace("maftools", quietly = TRUE)) {
tryCatch({
remotes::install_github("PoisonAlien/maftools", quiet = TRUE)
library(maftools)
}, error = function(e) {
message("Couldn't install maftools from GitHub, trying Bioconductor version instead")
})
}# Define file paths
maf_file_path <- "/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Tables/HCC_TACE_maftools.maf"
clinical_data_path <- "/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Tables/ClinicData_Maf_Final.tsv"
# Check if files exist
maf_exists <- file.exists(maf_file_path)
clinical_exists <- file.exists(clinical_data_path)
if (!maf_exists || !clinical_exists) {
warning("One or more files not found. Please check the paths.")
if (!maf_exists) warning(paste("MAF file not found:", maf_file_path))
if (!clinical_exists) warning(paste("Clinical data file not found:", clinical_data_path))
} else {
# Read MAF file with all mutation types
HCC_Final <- read.maf(
maf = maf_file_path,
clinicalData = clinical_data_path,
rmFlags = FALSE,
useAll = TRUE,
verbose = TRUE,
isTCGA = FALSE,
vc_nonSyn = c(
"Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
"Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Nonsense_Mutation",
"Intron", "Silent"
)
)
# Read MAF file without intron/silent mutations
HCC_Final_non_intron <- read.maf(
maf = maf_file_path,
clinicalData = clinical_data_path,
rmFlags = FALSE,
useAll = TRUE,
verbose = TRUE,
isTCGA = FALSE,
vc_nonSyn = c(
"Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
"Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Nonsense_Mutation"
)
)
# Read MAF file with default non-synonymous definitions
HCC_Final_non_synonymous <- read.maf(
maf = maf_file_path,
clinicalData = clinical_data_path,
rmFlags = FALSE,
useAll = TRUE,
verbose = TRUE,
isTCGA = FALSE,
vc_nonSyn = NULL
)
# Load clinical data
ClinicalData <- read.delim(clinical_data_path)
ClinicalData <- as.data.table(ClinicalData)
# Associate clinical data with MAF object
HCC_Final@clinical.data <- ClinicalData
HCC_Final_non_intron@clinical.data <- ClinicalData
HCC_Final_non_synonymous@clinical.data <- ClinicalData
# Print summary of loaded data
cat("Data successfully imported:\n")
cat(paste0("- Total samples: ", length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode)), "\n"))
cat(paste0("- Total patients: ", length(unique(HCC_Final@clinical.data$Pateint)), "\n"))
cat(paste0("- Total mutations (all types): ", nrow(HCC_Final@data), "\n"))
cat(paste0("- Total non-intron mutations: ", nrow(HCC_Final_non_intron@data), "\n"))
cat(paste0("- Total non-synonymous mutations: ", nrow(HCC_Final_non_synonymous@data), "\n"))
# List of HCC genes of interest
HCC_genes <- c(
"APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A", "CTNNB1", "HNF1A",
"JAK1", "KEAP1", "LZTR1", "PIK3CA", "PTEN", "RB1", "SF3B1",
"SMARCA4", "TERT", "TP53"
)
if (!require("remotes")) install.packages("remotes")
remotes::install_github("uham-bio/UHHformats", build_vignettes = TRUE)
# Define color palette for mutation types
fabcolors <- RColorBrewer::brewer.pal(n = 12, name = 'Set3')
names(fabcolors) <- c(
"Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
"Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Intron",
"Nonsense_Mutation", "Silent"
)
fabcolors <- list(Variant_Classification = fabcolors)
}## -Reading
## -Validating
## -Silent variants: 9
## -Summarizing
## -Processing clinical data
## -Finished in 0.078s elapsed (0.064s cpu)
## -Reading
## -Validating
## -Silent variants: 155
## -Summarizing
## -Processing clinical data
## -Finished in 0.051s elapsed (0.047s cpu)
## -Reading
## -Validating
## -Silent variants: 245
## -Summarizing
## -Processing clinical data
## -Finished in 0.058s elapsed (0.055s cpu)
## Data successfully imported:
## - Total samples: 60
## - Total patients: 30
## - Total mutations (all types): 299
## - Total non-intron mutations: 153
## - Total non-synonymous mutations: 63
This code imports and processes mutation data from MAF (Mutation Annotation Format) files for hepatocellular carcinoma (HCC) with these enhancements:
The code creates three separate objects with different filtering criteria:
#==============================================================================
# FUNCTION DEFINITIONS
#==============================================================================
#' Create MAF subsets based on clinical queries
#'
#' @param maf MAF object to subset
#' @param subset_definitions Named list of query expressions
#' @return None (assigns objects to global environment)
create_named_subsets <- function(maf, subset_definitions) {
for (name in names(subset_definitions)) {
assign(name, subsetMaf(maf = maf, clinQuery = subset_definitions[[name]]), envir = .GlobalEnv)
}
}
#' Calculate comprehensive statistics for MAF subsets
#'
#' @param maf_objects List of MAF objects
#' @param reference_maf Reference MAF for percentage calculations
#' @param category_map Mapping of subset names to categories
#' @return Data frame with statistics for each subset
calculate_maf_stats <- function(maf_objects, reference_maf = NULL, category_map = NULL) {
# Initialize an empty list to store individual data frames
stats_list <- list()
# Get reference statistics if available
ref_samples <- NA
ref_mutations <- NA
if(!is.null(reference_maf) && inherits(reference_maf, "MAF")) {
ref_samples <- length(unique(reference_maf@clinical.data$Tumor_Sample_Barcode))
ref_mutations <- nrow(reference_maf@data)
}
# Process each subset
for(name in names(maf_objects)) {
tryCatch({
maf_obj <- maf_objects[[name]]
# Get category if provided
category <- "Other"
if(!is.null(category_map) && name %in% names(category_map)) {
category <- category_map[[name]]
}
# Calculate basic statistics
samples <- unique(maf_obj@clinical.data$Tumor_Sample_Barcode)
n_samples <- length(samples)
n_mutations <- nrow(maf_obj@data)
# Skip if no data
if(n_samples == 0 || n_mutations == 0) {
stats_list[[name]] <- tibble(
Category = category,
Subset = name,
Samples = n_samples,
Mutations = n_mutations,
MedianMutsPerSample = 0,
TopGene = "None",
TopGeneMutationCount = 0,
SamplePct = ifelse(!is.na(ref_samples), round(n_samples/ref_samples*100, 1), NA),
MutationPct = ifelse(!is.na(ref_mutations), round(n_mutations/ref_mutations*100, 1), NA)
)
next
}
# Calculate mutation metrics
muts_per_sample <- table(maf_obj@data$Tumor_Sample_Barcode)
median_muts <- median(as.numeric(muts_per_sample))
# Find top gene
gene_counts <- sort(table(maf_obj@data$Hugo_Symbol), decreasing = TRUE)
top_gene <- names(gene_counts)[1]
top_gene_count <- gene_counts[1]
# Calculate percentages
sample_pct <- ifelse(!is.na(ref_samples), round(n_samples/ref_samples*100, 1), NA)
mut_pct <- ifelse(!is.na(ref_mutations), round(n_mutations/ref_mutations*100, 1), NA)
# Create tibble for this subset (tibbles don't have row names)
stats_list[[name]] <- tibble(
Category = category,
Subset = name,
Samples = n_samples,
Mutations = n_mutations,
MedianMutsPerSample = median_muts,
TopGene = top_gene,
TopGeneMutationCount = top_gene_count,
SamplePct = sample_pct,
MutationPct = mut_pct
)
}, error = function(e) {
warning(paste("Error processing", name, ":", e$message))
})
}
# Combine using bind_rows from dplyr to avoid row names issues
if(length(stats_list) > 0) {
stats_df <- bind_rows(stats_list)
} else {
stats_df <- tibble(
Category = character(),
Subset = character(),
Samples = integer(),
Mutations = integer(),
MedianMutsPerSample = numeric(),
TopGene = character(),
TopGeneMutationCount = integer(),
SamplePct = numeric(),
MutationPct = numeric()
)
}
return(stats_df)
}
#==============================================================================
# SUBSET DEFINITIONS
#==============================================================================
# Define cohort categories
subset_categories <- list(
"Treatment" = c("pre_tace_samples", "post_tace_samples", "progression_samples"),
"Etiology" = c("MASLD_samples", "HCV_samples", "HBV_samples", "Alcohol_samples",
"ALLsample_Viralsamples", "ALLsample_NoViral", "Alcohol_sample_MASLD_samples",
"ALLsample_No_MASLD", "ALLsample_No_Alcohol", "ALLsample_No_HBC"),
"BCLC" = c("BCLC_A", "BCLC_B", "BCLC_C", "ALLBCLC_NoC", "ALLBCLC_NoA"),
"Survival" = c("Patient_OS_survival", "Patient_PFS_survival")
)
# Define subset query expressions
subset_definitions <- list(
# Treatment condition subsets
pre_tace_samples = "Condition1 == 'Pre_TACE'",
post_tace_samples = "Condition1 == 'Post_TACE'",
progression_samples = "Condition1 == 'progression'",
# Disease etiology subsets
MASLD_samples = "Aetiology == 'MASLD'",
HCV_samples = "Aetiology == 'HCV'",
HBV_samples = "Aetiology == 'HBV'",
Alcohol_samples = "Aetiology == 'Alcohol'",
ALLsample_Viralsamples = "Aetiology %in% c('HCV','HBV')",
ALLsample_NoViral = "Aetiology %in% c('Alcohol','MASLD','Other')",
Alcohol_sample_MASLD_samples = "Aetiology %in% c('Alcohol','MASLD')",
ALLsample_No_MASLD = "Aetiology %in% c('Alcohol','HCV','HBV','Other')",
ALLsample_No_Alcohol = "Aetiology %in% c('MASLD','HCV','HBV','Other')",
ALLsample_No_HBC = "Aetiology %in% c('MASLD','HCV','Alcohol','Other')",
# BCLC stage subsets
BCLC_A = "BCLC %in% c('A')",
BCLC_B = "BCLC %in% c('B')",
BCLC_C = "BCLC %in% c('C')",
ALLBCLC_NoC = "BCLC %in% c('A','B')",
ALLBCLC_NoA = "BCLC %in% c('B','C')",
# Survival status subsets
Patient_OS_survival = "OS_Status == '1'",
Patient_PFS_survival = "PFS_Status == '1'"
)
#==============================================================================
# CREATE COHORT SUBSETS
#==============================================================================
# Create all subsets at once
cat("### Creating MAF subsets\n")## ### Creating MAF subsets
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## ✓ Created 20 MAF subsets
# Create category mapping
category_map <- list()
for(cat in names(subset_categories)) {
for(subset in subset_categories[[cat]]) {
category_map[[subset]] <- cat
}
}
# Get all MAF objects
maf_objects <- list()
for(subset_name in unlist(subset_categories)) {
if(exists(subset_name) && inherits(get(subset_name), "MAF")) {
maf_objects[[subset_name]] <- get(subset_name)
}
}
#==============================================================================
# CALCULATE STATISTICS
#==============================================================================
cat("### Calculating statistics\n")## ### Calculating statistics
subset_stats <- calculate_maf_stats(maf_objects, reference_maf = HCC_Final, category_map = category_map)
# Ensure it's a tibble to avoid row name issues
subset_stats <- as_tibble(subset_stats)
cat("✓ Calculated statistics for", nrow(subset_stats), "subsets\n\n")## ✓ Calculated statistics for 20 subsets
#==============================================================================
# GENERATE SUMMARY REPORT
#==============================================================================
# Overall dataset summary
cat("# Hepatocellular Carcinoma Cohort Statistics\n\n")## # Hepatocellular Carcinoma Cohort Statistics
# Reference dataset information
ref_samples <- length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode))
ref_mutations <- nrow(HCC_Final@data)
ref_median_muts <- median(table(HCC_Final@data$Tumor_Sample_Barcode))
cat("<div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>")## <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
## <strong>Reference Dataset Summary:</strong><br>
## 60 samples with 299 mutations<br>
## Median mutations per sample: 3.5 <br>
## </div>
#==============================================================================
# CATEGORY-SPECIFIC STATISTICS
#==============================================================================
# Create tables and visualizations by category
for(cat in unique(subset_stats$Category)) {
cat(paste0("## ", cat, " Cohorts\n\n"))
# Filter statistics for this category
cat_stats_filtered <- subset_stats %>%
filter(Category == cat) %>%
arrange(desc(Samples))
# Create a completely fresh tibble without any possibility of row names
cat_stats <- tibble(
Subset = cat_stats_filtered$Subset,
Samples = cat_stats_filtered$Samples,
Mutations = cat_stats_filtered$Mutations,
MedianMutsPerSample = cat_stats_filtered$MedianMutsPerSample,
TopGene = cat_stats_filtered$TopGene,
TopGeneMutationCount = cat_stats_filtered$TopGeneMutationCount,
SamplePct = cat_stats_filtered$SamplePct,
MutationPct = cat_stats_filtered$MutationPct
)
cat("### Summary Table\n")
# Add row.names=FALSE parameter here
knitr::kable(cat_stats,
caption = paste(cat, "Cohort Statistics"),
format = "html",
row.names = FALSE) %>% # Explicitly set row.names=FALSE
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
column_spec(1, bold = TRUE) %>%
print()
cat("\n### Visualizations\n")
# Sample count visualization
p1 <- ggplot(cat_stats, aes(x = reorder(Subset, -Samples), y = Samples, fill = Subset)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Samples), vjust = -0.5, size = 3.5) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
labs(title = paste("Sample Counts for", cat, "Cohorts"),
x = NULL, y = "Number of Samples") +
scale_fill_brewer(palette = "Set2")
# Mutation rate visualization
p2 <- ggplot(cat_stats, aes(x = reorder(Subset, -MedianMutsPerSample),
y = MedianMutsPerSample, fill = Subset)) +
geom_bar(stat = "identity") +
geom_text(aes(label = MedianMutsPerSample), vjust = -0.5, size = 3.5) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
labs(title = paste("Mutation Rate for", cat, "Cohorts"),
x = NULL, y = "Median Mutations per Sample") +
scale_fill_brewer(palette = "Set2")
# Display plots
print(p1)
print(p2)
cat("\n\n")
}## ## Treatment Cohorts
##
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Treatment Cohort Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> Subset </th>
## <th style="text-align:right;"> Samples </th>
## <th style="text-align:right;"> Mutations </th>
## <th style="text-align:right;"> MedianMutsPerSample </th>
## <th style="text-align:left;"> TopGene </th>
## <th style="text-align:right;"> TopGeneMutationCount </th>
## <th style="text-align:right;"> SamplePct </th>
## <th style="text-align:right;"> MutationPct </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> post_tace_samples </td>
## <td style="text-align:right;"> 26 </td>
## <td style="text-align:right;"> 128 </td>
## <td style="text-align:right;"> 4 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 14 </td>
## <td style="text-align:right;"> 43.3 </td>
## <td style="text-align:right;"> 42.8 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> pre_tace_samples </td>
## <td style="text-align:right;"> 23 </td>
## <td style="text-align:right;"> 110 </td>
## <td style="text-align:right;"> 3 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 15 </td>
## <td style="text-align:right;"> 38.3 </td>
## <td style="text-align:right;"> 36.8 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> progression_samples </td>
## <td style="text-align:right;"> 11 </td>
## <td style="text-align:right;"> 61 </td>
## <td style="text-align:right;"> 3 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 16 </td>
## <td style="text-align:right;"> 18.3 </td>
## <td style="text-align:right;"> 20.4 </td>
## </tr>
## </tbody>
## </table>
## ### Visualizations
##
##
## ## Etiology Cohorts
##
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Etiology Cohort Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> Subset </th>
## <th style="text-align:right;"> Samples </th>
## <th style="text-align:right;"> Mutations </th>
## <th style="text-align:right;"> MedianMutsPerSample </th>
## <th style="text-align:left;"> TopGene </th>
## <th style="text-align:right;"> TopGeneMutationCount </th>
## <th style="text-align:right;"> SamplePct </th>
## <th style="text-align:right;"> MutationPct </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLsample_No_HBC </td>
## <td style="text-align:right;"> 58 </td>
## <td style="text-align:right;"> 284 </td>
## <td style="text-align:right;"> 3.5 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 34 </td>
## <td style="text-align:right;"> 96.7 </td>
## <td style="text-align:right;"> 95.0 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLsample_No_Alcohol </td>
## <td style="text-align:right;"> 44 </td>
## <td style="text-align:right;"> 198 </td>
## <td style="text-align:right;"> 3.0 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 25 </td>
## <td style="text-align:right;"> 73.3 </td>
## <td style="text-align:right;"> 66.2 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLsample_No_MASLD </td>
## <td style="text-align:right;"> 42 </td>
## <td style="text-align:right;"> 225 </td>
## <td style="text-align:right;"> 3.5 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 31 </td>
## <td style="text-align:right;"> 70.0 </td>
## <td style="text-align:right;"> 75.3 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLsample_NoViral </td>
## <td style="text-align:right;"> 38 </td>
## <td style="text-align:right;"> 197 </td>
## <td style="text-align:right;"> 4.0 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 31 </td>
## <td style="text-align:right;"> 63.3 </td>
## <td style="text-align:right;"> 65.9 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Alcohol_sample_MASLD_samples </td>
## <td style="text-align:right;"> 34 </td>
## <td style="text-align:right;"> 175 </td>
## <td style="text-align:right;"> 4.5 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 30 </td>
## <td style="text-align:right;"> 56.7 </td>
## <td style="text-align:right;"> 58.5 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLsample_Viralsamples </td>
## <td style="text-align:right;"> 22 </td>
## <td style="text-align:right;"> 102 </td>
## <td style="text-align:right;"> 3.0 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 11 </td>
## <td style="text-align:right;"> 36.7 </td>
## <td style="text-align:right;"> 34.1 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> HCV_samples </td>
## <td style="text-align:right;"> 20 </td>
## <td style="text-align:right;"> 87 </td>
## <td style="text-align:right;"> 3.0 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 11 </td>
## <td style="text-align:right;"> 33.3 </td>
## <td style="text-align:right;"> 29.1 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> MASLD_samples </td>
## <td style="text-align:right;"> 18 </td>
## <td style="text-align:right;"> 74 </td>
## <td style="text-align:right;"> 3.5 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 13 </td>
## <td style="text-align:right;"> 30.0 </td>
## <td style="text-align:right;"> 24.7 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Alcohol_samples </td>
## <td style="text-align:right;"> 16 </td>
## <td style="text-align:right;"> 101 </td>
## <td style="text-align:right;"> 5.0 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 25 </td>
## <td style="text-align:right;"> 26.7 </td>
## <td style="text-align:right;"> 33.8 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> HBV_samples </td>
## <td style="text-align:right;"> 2 </td>
## <td style="text-align:right;"> 15 </td>
## <td style="text-align:right;"> 7.5 </td>
## <td style="text-align:left;"> AXIN1 </td>
## <td style="text-align:right;"> 5 </td>
## <td style="text-align:right;"> 3.3 </td>
## <td style="text-align:right;"> 5.0 </td>
## </tr>
## </tbody>
## </table>
## ### Visualizations
##
##
## ## BCLC Cohorts
##
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>BCLC Cohort Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> Subset </th>
## <th style="text-align:right;"> Samples </th>
## <th style="text-align:right;"> Mutations </th>
## <th style="text-align:right;"> MedianMutsPerSample </th>
## <th style="text-align:left;"> TopGene </th>
## <th style="text-align:right;"> TopGeneMutationCount </th>
## <th style="text-align:right;"> SamplePct </th>
## <th style="text-align:right;"> MutationPct </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLBCLC_NoC </td>
## <td style="text-align:right;"> 49 </td>
## <td style="text-align:right;"> 218 </td>
## <td style="text-align:right;"> 3 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 26 </td>
## <td style="text-align:right;"> 81.7 </td>
## <td style="text-align:right;"> 72.9 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> ALLBCLC_NoA </td>
## <td style="text-align:right;"> 41 </td>
## <td style="text-align:right;"> 186 </td>
## <td style="text-align:right;"> 3 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 29 </td>
## <td style="text-align:right;"> 68.3 </td>
## <td style="text-align:right;"> 62.2 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> BCLC_B </td>
## <td style="text-align:right;"> 30 </td>
## <td style="text-align:right;"> 105 </td>
## <td style="text-align:right;"> 3 </td>
## <td style="text-align:left;"> ARID1A </td>
## <td style="text-align:right;"> 14 </td>
## <td style="text-align:right;"> 50.0 </td>
## <td style="text-align:right;"> 35.1 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> BCLC_A </td>
## <td style="text-align:right;"> 19 </td>
## <td style="text-align:right;"> 113 </td>
## <td style="text-align:right;"> 5 </td>
## <td style="text-align:left;"> SF3B1 </td>
## <td style="text-align:right;"> 15 </td>
## <td style="text-align:right;"> 31.7 </td>
## <td style="text-align:right;"> 37.8 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> BCLC_C </td>
## <td style="text-align:right;"> 11 </td>
## <td style="text-align:right;"> 81 </td>
## <td style="text-align:right;"> 6 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 20 </td>
## <td style="text-align:right;"> 18.3 </td>
## <td style="text-align:right;"> 27.1 </td>
## </tr>
## </tbody>
## </table>
## ### Visualizations
##
##
## ## Survival Cohorts
##
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Survival Cohort Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> Subset </th>
## <th style="text-align:right;"> Samples </th>
## <th style="text-align:right;"> Mutations </th>
## <th style="text-align:right;"> MedianMutsPerSample </th>
## <th style="text-align:left;"> TopGene </th>
## <th style="text-align:right;"> TopGeneMutationCount </th>
## <th style="text-align:right;"> SamplePct </th>
## <th style="text-align:right;"> MutationPct </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Patient_PFS_survival </td>
## <td style="text-align:right;"> 41 </td>
## <td style="text-align:right;"> 221 </td>
## <td style="text-align:right;"> 4 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 31 </td>
## <td style="text-align:right;"> 68.3 </td>
## <td style="text-align:right;"> 73.9 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Patient_OS_survival </td>
## <td style="text-align:right;"> 37 </td>
## <td style="text-align:right;"> 209 </td>
## <td style="text-align:right;"> 5 </td>
## <td style="text-align:left;"> CTNNB1 </td>
## <td style="text-align:right;"> 31 </td>
## <td style="text-align:right;"> 61.7 </td>
## <td style="text-align:right;"> 69.9 </td>
## </tr>
## </tbody>
## </table>
## ### Visualizations
#==============================================================================
# CROSS-CATEGORY COMPARISONS
#==============================================================================
cat("## Cross-Category Comparisons\n\n")## ## Cross-Category Comparisons
# Sample size comparison across categories
# Use proper tibble-compatible approach for factor reordering
subset_stats_ordered <- subset_stats %>%
mutate(Subset = factor(Subset, levels = Subset[order(Samples, decreasing = TRUE)]))
# Overall sample size comparison
cat("### Overall Sample Count Comparison\n")## ### Overall Sample Count Comparison
overall_plot <- ggplot(subset_stats_ordered, aes(x = Subset, y = Samples, fill = Category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Samples), vjust = -0.3, size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Cohort Sizes Across All Categories",
x = NULL, y = "Number of Samples") +
scale_fill_brewer(palette = "Set2")
print(overall_plot)##
## ### Overall Mutation Rate Comparison
mutation_plot <- ggplot(subset_stats, aes(x = reorder(Subset, -MedianMutsPerSample),
y = MedianMutsPerSample, fill = Category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = MedianMutsPerSample), vjust = -0.3, size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Mutation Rates Across All Categories",
x = NULL, y = "Median Mutations per Sample") +
scale_fill_brewer(palette = "Set2")
print(mutation_plot)#==============================================================================
# INTERACTIVE DATA EXPLORATION
#==============================================================================
cat("\n## Interactive Data Table\n\n")##
## ## Interactive Data Table
if(requireNamespace("DT", quietly = TRUE)) {
# Create a completely new tibble from scratch to avoid inheriting row names
display_stats <- tibble(
Category = subset_stats$Category,
Subset = subset_stats$Subset,
Samples = subset_stats$Samples,
Mutations = subset_stats$Mutations,
MedianMutsPerSample = subset_stats$MedianMutsPerSample,
TopGene = subset_stats$TopGene,
TopGeneMutationCount = subset_stats$TopGeneMutationCount,
SamplePct = subset_stats$SamplePct,
MutationPct = subset_stats$MutationPct
)
DT::datatable(display_stats,
caption = "Comprehensive Cohort Statistics",
filter = "top",
options = list(
pageLength = 20,
dom = 'Blfrtip',
buttons = c('copy', 'csv'),
lengthMenu = list(c(10, 20, -1), c('10', '20', 'All'))
),
rownames = FALSE) %>%
DT::formatStyle(
'Samples',
background = DT::styleColorBar(c(0, max(display_stats$Samples)), 'steelblue'),
backgroundSize = '98% 88%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center'
) %>%
DT::formatStyle(
'Mutations',
background = DT::styleColorBar(c(0, max(display_stats$Mutations)), 'forestgreen'),
backgroundSize = '98% 88%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center'
) %>%
DT::formatRound(columns = c('MedianMutsPerSample', 'SamplePct', 'MutationPct'), digits = 1)
} else {
cat("*Note: Install the DT package for interactive data tables*")
}# Create a dashboard with key metrics
valueBox_html <- function(value, caption, icon, color) {
paste0(
'<div class="section-box" style="background-color:',
switch(color,
"primary" = "#e8f0f9",
"info" = "#e3f2fd",
"success" = "#e8f8ef",
"warning" = "#fff8e8",
"#e8f0f9"),
'; padding: 15px; border-radius: 5px; margin: 10px; flex: 1; min-width: 250px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">',
'<div style="display:flex; align-items:center;">',
'<div style="font-size:2.5em; margin-right:15px; color:',
switch(color,
"primary" = "#3498db",
"info" = "#0D47A1",
"success" = "#27ae60",
"warning" = "#f39c12",
"#3498db"),
';">',
'<i class="fa ', icon, '"></i>',
'</div>',
'<div>',
'<div style="font-size:1.8em; font-weight:bold;">', value, '</div>',
'<div style="font-size:1.2em; color:#7f8c8d;">', caption, '</div>',
'</div>',
'</div>',
'</div>'
)
}
if (exists("HCC_Final") && exists("HCC_Final_non_intron") && exists("HCC_Final_non_synonymous")) {
# Number of samples
num_samples <- length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode))
# Number of mutations
num_mutations <- nrow(HCC_Final@data)
# Median mutations per sample
median_muts <- median(HCC_Final@variants.per.sample$Variants)
# Number of samples non-intron
num_samplesnon_intron <- length(unique(HCC_Final_non_intron@clinical.data$Tumor_Sample_Barcode))
# Number of mutations non-intron
num_mutationsnon_intron <- nrow(HCC_Final_non_intron@data)
# Median mutations per sample non-intron
median_mutsnon_intron <- median(HCC_Final_non_intron@variants.per.sample$Variants)
# Number of samples non-synonymous
num_samplesnon_synonymous <- length(unique(HCC_Final_non_synonymous@clinical.data$Tumor_Sample_Barcode))
# Number of mutations non-synonymous
num_mutationsnon_synonymous <- nrow(HCC_Final_non_synonymous@data)
# Median mutations per sample non-synonymous
median_mutsnon_synonymous <- median(HCC_Final_non_synonymous@variants.per.sample$Variants)
} else {
cat("Data not properly loaded. Please check the data import step.")
}All Mutations
Non-Intron Mutations
Non-Synonymous Mutations
if (requireNamespace("gtsummary", quietly = TRUE) && requireNamespace("DT", quietly = TRUE) && exists("HCC_Final")) {
library(gtsummary)
library(dplyr)
library(DT)
# Get all unique patients from the first Pre_TACE sample for each patient
unique_patients <- HCC_Final@clinical.data %>%
as.data.frame() %>%
filter(Condition1 == "Pre_TACE") %>%
distinct(Pateint, .keep_all = TRUE)
# If not all patients have Pre_TACE samples, add their first available sample
if (nrow(unique_patients) < length(unique(HCC_Final@clinical.data$Pateint))) {
missing_patients <- setdiff(unique(HCC_Final@clinical.data$Pateint), unique_patients$Pateint)
for (patient in missing_patients) {
patient_samples <- HCC_Final@clinical.data[HCC_Final@clinical.data$Pateint == patient, ]
unique_patients <- rbind(unique_patients, patient_samples[1, ])
}
}
# Select relevant columns for summary
clin_cols <- c("Age", "Gender", "Aetiology", "Viral_Not_Viral", "Cirrhosis",
"CTP", "BCLC", "Tumour_Size", "Response", "OS_time",
"OS_Status", "PFS_time", "PFS_Status", "PS", "HAP", "PVT")
# Create summary table for available columns
available_cols <- intersect(clin_cols, colnames(unique_patients))
if (length(available_cols) > 0) {
# Create categorization for variables
var_labels <- list(
Age = "Age (years)",
Gender = "Sex",
Aetiology = "Etiology",
Viral_Not_Viral = "Viral Status",
Cirrhosis = "Cirrhosis",
CTP = "Child-Turcotte-Pugh Class",
BCLC = "Barcelona Clinic Liver Cancer Stage",
Tumour_Size = "Tumor Size (cm)",
Response = "Treatment Response",
OS_time = "Overall Survival (months)",
OS_Status = "Overall Survival Status",
PFS_time = "Progression-Free Survival (months)",
PFS_Status = "Progression Status",
PS = "Performance Status",
HAP = "Hepatoma Arterial-embolization Prognostic Score",
PVT = "Portal Vein Thrombosis"
)
# Subset to available variables
var_labels <- var_labels[names(var_labels) %in% available_cols]
# Create summary statistics in an organized way
tbl_summary_data <- unique_patients %>%
select(all_of(available_cols)) %>%
tbl_summary(
by = NULL,
label = var_labels,
statistic = list(
all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)"
),
digits = all_continuous() ~ 1,
missing = "no"
) %>%
modify_header(label = "**Characteristic**", stat_0 = "**Value (N={N})**") %>%
bold_labels() %>%
as_gt() %>%
gt::tab_header(title = "Clinical Characteristics of HCC Patients Treated with TACE")
# Print the table
tbl_summary_data
# Create interactive table
DT::datatable(
unique_patients %>% select(all_of(available_cols)),
options = list(
pageLength = 10,
scrollX = TRUE,
dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
columnDefs = list(
list(className = 'dt-center', targets = "_all")
)
),
filter = 'top',
class = 'cell-border stripe hover',
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: center; font-size: 16px; font-weight: bold; margin-top: 20px;',
'Interactive Table: HCC Patient Data'
)
)
} else {
cat("Clinical data columns not found in the expected format.")
}
} else {
cat("Required packages (gtsummary, DT) not available or data not properly loaded.")
}if (exists("HCC_Final") && requireNamespace("maftools", quietly = TRUE)) {
# Plot mutation summary
par(mfrow = c(1, 1))
tryCatch({
plotmafSummary(
maf = HCC_Final,
rmOutlier = TRUE,
dashboard = TRUE,
addStat = 'median',
showBarcodes = TRUE,
fs = 1,
color = fabcolors$Variant_Classification,
log_scale = FALSE,
textSize = 0.8,
titleSize = c(1, 1)
)
}, error = function(e) {
cat("Error in plotting MAF summary:", e$message, "\n")
cat("Reverting to simplified plot.\n")
# Simplified summary plot
plotmafSummary(
maf = HCC_Final,
rmOutlier = TRUE,
addStat = 'median'
)
})
}if (exists("HCC_Final_non_intron") && requireNamespace("maftools", quietly = TRUE)) {
# Plot mutation summary
par(mfrow = c(1, 1))
tryCatch({
# Create standard summary plot with larger text
plotmafSummary(
maf = HCC_Final_non_intron,
rmOutlier = TRUE,
dashboard = TRUE,
addStat = 'median',
showBarcodes = TRUE,
fs = 0.9, # Increase cex font size parameter for gene names
color = fabcolors$Variant_Classification,
log_scale = FALSE,
textSize = 1.2, # Increase text size for labels and annotations
titleSize = c(1.5, 1.3) # Increase title and subtitle sizes
)
# Additionally, create a separate bar plot specifically for the HCC genes of interest
par(mfrow = c(1, 1))
# Check which of your HCC genes actually have mutations in the dataset
genes_in_data <- HCC_genes[HCC_genes %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)]
if(length(genes_in_data) > 0) {
# Generate a focused oncoplot just for your HCC genes
cat("\nGenerating focused plot for HCC genes of interest:\n")
# Set custom global plot parameters for larger text
par(cex.axis = 1.2, # Increase axis text size
cex.lab = 1.4, # Increase axis labels size
cex.main = 1.5) # Increase title size
# Use only the supported parameters
mafbarplot(
maf = HCC_Final_non_intron,
genes = HCC_genes, # Use your specific HCC genes list
fontSize = 0.9, # Increase font size for gene names and percentages
color = fabcolors$Variant_Classification,
showPct = TRUE
)
cat("Displayed mutations for these HCC genes of interest:\n")
cat(paste(genes_in_data, collapse = ", "), "\n")
} else {
cat("None of the specified HCC genes have mutations in this dataset.\n")
}
}, error = function(e) {
cat("Error in plotting:", e$message, "\n")
cat("Reverting to simplified plot.\n")
# Simplified summary plot
plotmafSummary(
maf = HCC_Final_non_intron,
rmOutlier = TRUE,
addStat = 'median',
textSize = 1.2 # Increase text size here too
)
# Try simplified gene-specific plot
tryCatch({
mafbarplot(
maf = HCC_Final_non_intron,
genes = HCC_genes,
fontSize = 1.3 # Increase font size
)
}, error = function(e2) {
cat("Could not generate HCC gene-specific plot:", e2$message, "\n")
})
})
}##
## Generating focused plot for HCC genes of interest:
## Displayed mutations for these HCC genes of interest:
## APC, ARID1A, ARID2, ATM, AXIN1, CDKN2A, CTNNB1, HNF1A, JAK1, KEAP1, LZTR1, PIK3CA, PTEN, RB1, SF3B1, SMARCA4, TERT, TP53
# After generating your plots, save them with higher resolution
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/HCC_mutation_summary.png", width = 12, height = 10, units = "in", res = 400) # Increased size and resolution
# Set global parameters for saved plot
par(cex.axis = 1.2, cex.lab = 1.4, cex.main = 1.5)
# Here we need to regenerate the plot to capture it in the PNG
plotmafSummary(
maf = HCC_Final_non_intron,
rmOutlier = TRUE,
dashboard = TRUE,
addStat = 'median',
showBarcodes = TRUE,
fs = 1.2, # Increased font size
color = fabcolors$Variant_Classification,
log_scale = FALSE,
textSize = 1.2, # Increased text size
titleSize = c(1.5, 1.3) # Increased title sizes
)
dev.off()## quartz_off_screen
## 2
# Save the HCC genes specific plot with higher resolution
if(exists("genes_in_data") && length(genes_in_data) > 0) {
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/HCC_genes_mutations.png", width = 12, height = 10, units = "in", res = 400) # Increased size and resolution
# Set global parameters for saved plot
par(cex.axis = 1.2, cex.lab = 1.4, cex.main = 1.5, mar = c(5, 10, 4, 12)) # Increased margins
mafbarplot(
maf = HCC_Final_non_intron,
genes = HCC_genes,
fontSize = 1.3, # Increased font size
color = fabcolors$Variant_Classification,
showPct = TRUE
)
# Add custom x-axis label with larger font
title(xlab = "Number of alterations", cex.lab = 1.4, font.lab = 2)
dev.off()
}## quartz_off_screen
## 2
if (exists("HCC_Final_non_synonymous") && requireNamespace("maftools", quietly = TRUE)) {
# Plot mutation summary
par(mfrow = c(1, 1))
tryCatch({
plotmafSummary(
maf = HCC_Final_non_synonymous,
rmOutlier = TRUE,
dashboard = TRUE,
addStat = 'median',
showBarcodes = TRUE,
fs = 1,
color = fabcolors$Variant_Classification,
log_scale = FALSE,
textSize = 0.8,
titleSize = c(1, 1)
)
}, error = function(e) {
cat("Error in plotting MAF summary:", e$message, "\n")
cat("Reverting to simplified plot.\n")
# Simplified summary plot
plotmafSummary(
maf = HCC_Final_non_synonymous,
rmOutlier = TRUE,
addStat = 'median'
)
})
}if (exists("HCC_Final_non_intron")) {
# Create directory for publication figures
dir.create("publication_figures", showWarnings = FALSE)
# Define professional color palettes with brighter colors
mutation_colors <- c(
"Missense_Mutation" = "#4285F4", # Blue
"Frame_Shift_Del" = "#EA4335", # Red
"Nonsense_Mutation" = "#FBBC05", # Yellow
"Splice_Site" = "#34A853", # Green
"In_Frame_Del" = "#FF9900", # Orange
"Frame_Shift_Ins" = "#9C27B0", # Purple
"In_Frame_Ins" = "#00ACC1", # Cyan
"3'UTR" = "#8E24AA", # Bright Purple
"5'UTR" = "#FFD600", # Bright Yellow
"5'Flank" = "#795548", # Brown
"Silent" = "#BDBDBD" # Gray
)
variant_colors <- c(
"SNP" = "#FF5252", # Red
"DEL" = "#448AFF", # Blue
"INS" = "#66BB6A", # Green
"DNP" = "#FFC107", # Amber
"TNP" = "#7E57C2", # Deep Purple
"ONP" = "#26A69A" # Teal
)
snv_colors <- c(
"C>A" = "#27AE60", # Green
"C>G" = "#3498DB", # Blue
"C>T" = "#E74C3C", # Red
"T>A" = "#F39C12", # Orange
"T>C" = "#9B59B6", # Purple
"T>G" = "#2C3E50" # Dark Blue
)
# -------------------------------------------------------------------------------
# 1. VARIANT CLASSIFICATION PLOT
# -------------------------------------------------------------------------------
tryCatch({
# Extract variant classification data
vc_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Classification))
colnames(vc_data) <- c("Variant_Classification", "Frequency")
vc_data <- vc_data[order(-vc_data$Frequency),]
# Set up colors
vc_colors <- rep("#1F77B4", nrow(vc_data)) # Default blue color
for (i in 1:nrow(vc_data)) {
if (vc_data$Variant_Classification[i] %in% names(mutation_colors)) {
vc_colors[i] <- mutation_colors[as.character(vc_data$Variant_Classification[i])]
}
}
# Create the plot
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Variant_Classification.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
# Set up margins and text parameters
par(mar = c(5, 12, 4, 4), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(vc_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(vc_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = vc_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "Variant Classification",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(vc_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(vc_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(vc_data$Frequency) * 0.05,
y = barplot_result,
labels = vc_data$Variant_Classification,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = vc_data$Frequency + max(vc_data$Frequency) * 0.02,
y = barplot_result,
labels = vc_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
dev.off()
cat("Created Variant_Classification.png\n")
}, error = function(e) {
cat("Error creating Variant Classification plot:", e$message, "\n")
})
# -------------------------------------------------------------------------------
# 2. VARIANT TYPE PLOT
# -------------------------------------------------------------------------------
tryCatch({
# Extract variant type data
vt_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Type))
colnames(vt_data) <- c("Variant_Type", "Frequency")
vt_data <- vt_data[order(-vt_data$Frequency),]
# Set up colors
vt_colors <- rep("#1F77B4", nrow(vt_data)) # Default blue color
for (i in 1:nrow(vt_data)) {
if (vt_data$Variant_Type[i] %in% names(variant_colors)) {
vt_colors[i] <- variant_colors[as.character(vt_data$Variant_Type[i])]
}
}
# Create the plot
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Variant_Type.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
# Set up margins and text parameters
par(mar = c(5, 10, 4, 4), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(vt_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(vt_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = vt_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "Variant Type",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(vt_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(vt_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(vt_data$Frequency) * 0.05,
y = barplot_result,
labels = vt_data$Variant_Type,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = vt_data$Frequency + max(vt_data$Frequency) * 0.02,
y = barplot_result,
labels = vt_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
dev.off()
cat("Created Variant_Type.png\n")
}, error = function(e) {
cat("Error creating Variant Type plot:", e$message, "\n")
})
# -------------------------------------------------------------------------------
# 3. SNV CLASS PLOT
# -------------------------------------------------------------------------------
tryCatch({
# Try to extract SNV data from Variant_Type == "SNP"
snp_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Variant_Type == "SNP", ]
if (nrow(snp_data) > 0 && all(c("Reference_Allele", "Tumor_Seq_Allele2") %in% colnames(snp_data))) {
# Create SNV classification
snp_data$SNV_Class <- paste0(snp_data$Reference_Allele, ">", snp_data$Tumor_Seq_Allele2)
# Keep only standard classes
std_classes <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
snv_counts <- table(factor(snp_data$SNV_Class, levels = std_classes))
# Create data frame from counts
snv_data <- data.frame(
SNV_Class = names(snv_counts),
Frequency = as.numeric(snv_counts)
)
snv_data <- snv_data[snv_data$Frequency > 0, ]
snv_data <- snv_data[order(-snv_data$Frequency), ]
# Set up colors
plot_colors <- rep("#1F77B4", nrow(snv_data)) # Default blue color
for (i in 1:nrow(snv_data)) {
if (snv_data$SNV_Class[i] %in% names(snv_colors)) {
plot_colors[i] <- snv_colors[as.character(snv_data$SNV_Class[i])]
}
}
# Create the plot
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
# Set up margins and text parameters
par(mar = c(5, 10, 4, 4), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(snv_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(snv_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = plot_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "SNV Class",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(snv_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(snv_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(snv_data$Frequency) * 0.05,
y = barplot_result,
labels = snv_data$SNV_Class,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = snv_data$Frequency + max(snv_data$Frequency) * 0.02,
y = barplot_result,
labels = snv_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
dev.off()
cat("Created SNV_Class.png\n")
} else {
# Try using maftools titv if available
if ("titv" %in% ls(envir = asNamespace("maftools"), all.names = TRUE)) {
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
titv_result <- titv(maf = HCC_Final_non_intron, plot = FALSE)
plotTiTv(res = titv_result, fontSize = 1.2, mainLabelSize = 1.5)
dev.off()
cat("Created SNV_Class.png using maftools' titv function\n")
} else {
cat("Could not generate SNV Class plot: insufficient data\n")
}
}
}, error = function(e) {
cat("Error creating SNV Class plot:", e$message, "\n")
# Fallback to simpler approach
tryCatch({
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
par(mar = c(5, 5, 4, 2))
plotTiTv(res = titv(maf = HCC_Final_non_intron, returnAll = TRUE), fontSize = 1.2)
dev.off()
}, error = function(e2) {
cat("Could not create SNV Class plot using alternative method:", e2$message, "\n")
})
})
# -------------------------------------------------------------------------------
# 4. COMBINED PLOT WITH PANEL LABELS
# -------------------------------------------------------------------------------
tryCatch({
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Combined_Mutation_Summary.png", width = 18, height = 6, units = "in", res = 300, bg = "white")
# Create a 1x3 layout
layout(matrix(1:3, nrow = 1), widths = c(1, 1, 1))
# 1. Variant Classification
vc_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Classification))
colnames(vc_data) <- c("Variant_Classification", "Frequency")
vc_data <- vc_data[order(-vc_data$Frequency),]
vc_colors <- rep("#1F77B4", nrow(vc_data)) # Default blue color
for (i in 1:nrow(vc_data)) {
if (vc_data$Variant_Classification[i] %in% names(mutation_colors)) {
vc_colors[i] <- mutation_colors[as.character(vc_data$Variant_Classification[i])]
}
}
# Set up margins and text parameters
par(mar = c(5, 12, 4, 2), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(vc_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(vc_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = vc_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "Variant Classification",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(vc_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(vc_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(vc_data$Frequency) * 0.05,
y = barplot_result,
labels = vc_data$Variant_Classification,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = vc_data$Frequency + max(vc_data$Frequency) * 0.02,
y = barplot_result,
labels = vc_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
# Add panel label
text(x = -max(vc_data$Frequency) * 0.1, y = max(barplot_result) * 1.15,
labels = "A", font = 2, cex = 1.5, xpd = TRUE)
# 2. Variant Type
vt_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Type))
colnames(vt_data) <- c("Variant_Type", "Frequency")
vt_data <- vt_data[order(-vt_data$Frequency),]
vt_colors <- rep("#1F77B4", nrow(vt_data)) # Default blue color
for (i in 1:nrow(vt_data)) {
if (vt_data$Variant_Type[i] %in% names(variant_colors)) {
vt_colors[i] <- variant_colors[as.character(vt_data$Variant_Type[i])]
}
}
# Set up margins and text parameters
par(mar = c(5, 10, 4, 2), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(vt_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(vt_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = vt_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "Variant Type",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(vt_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(vt_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(vt_data$Frequency) * 0.05,
y = barplot_result,
labels = vt_data$Variant_Type,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = vt_data$Frequency + max(vt_data$Frequency) * 0.02,
y = barplot_result,
labels = vt_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
# Add panel label
text(x = -max(vt_data$Frequency) * 0.1, y = max(barplot_result) * 1.15,
labels = "B", font = 2, cex = 1.5, xpd = TRUE)
# 3. SNV Class
snp_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Variant_Type == "SNP", ]
if (nrow(snp_data) > 0 && all(c("Reference_Allele", "Tumor_Seq_Allele2") %in% colnames(snp_data))) {
snp_data$SNV_Class <- paste0(snp_data$Reference_Allele, ">", snp_data$Tumor_Seq_Allele2)
std_classes <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
snv_counts <- table(factor(snp_data$SNV_Class, levels = std_classes))
snv_data <- data.frame(
SNV_Class = names(snv_counts),
Frequency = as.numeric(snv_counts)
)
snv_data <- snv_data[snv_data$Frequency > 0, ]
snv_data <- snv_data[order(-snv_data$Frequency), ]
plot_colors <- rep("#1F77B4", nrow(snv_data)) # Default blue color
for (i in 1:nrow(snv_data)) {
if (snv_data$SNV_Class[i] %in% names(snv_colors)) {
plot_colors[i] <- snv_colors[as.character(snv_data$SNV_Class[i])]
}
}
# Set up margins and text parameters
par(mar = c(5, 10, 4, 2), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(snv_data$Frequency) * 1.15
# Create horizontal barplot with no borders
barplot_result <- barplot(snv_data$Frequency,
horiz = TRUE,
names.arg = FALSE,
col = plot_colors,
border = NA,
xlim = c(0, x_max),
xlab = "",
main = "SNV Class",
axes = FALSE)
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(snv_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(snv_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels aligned right
text(x = -max(snv_data$Frequency) * 0.05,
y = barplot_result,
labels = snv_data$SNV_Class,
xpd = TRUE,
adj = 1,
cex = 0.9)
# Add count labels at the end of each bar
text(x = snv_data$Frequency + max(snv_data$Frequency) * 0.02,
y = barplot_result,
labels = snv_data$Frequency,
adj = 0,
cex = 0.8)
# Add x-axis label
mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
# Add panel label
text(x = -max(snv_data$Frequency) * 0.1, y = max(barplot_result) * 1.15,
labels = "C", font = 2, cex = 1.5, xpd = TRUE)
} else {
# Just leave this panel blank if we can't create SNV plot
par(mar = c(5, 10, 4, 2))
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", main = "SNV Class")
text(1, 1, "SNV Class data not available", cex = 1.2)
# Add panel label
text(0.5, 1.8, labels = "C", font = 2, cex = 1.5, xpd = TRUE)
}
dev.off()
cat("Created Combined_Mutation_Summary.png\n")
}, error = function(e) {
cat("Error creating combined plot:", e$message, "\n")
})
# -------------------------------------------------------------------------------
# 5. TOP MUTATED GENES PLOT
# -------------------------------------------------------------------------------
tryCatch({
# Determine which genes to use
top_genes <- NULL
# If HCC_genes exists, use those genes
if (exists("HCC_genes") && length(HCC_genes) > 0) {
genes_in_data <- HCC_genes[HCC_genes %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)]
if (length(genes_in_data) > 0) {
top_genes <- genes_in_data
}
}
# If no specific HCC genes or none found, use top 10 most mutated
if (is.null(top_genes)) {
gene_counts <- table(HCC_Final_non_intron@data$Hugo_Symbol)
gene_counts <- sort(gene_counts, decreasing = TRUE)
top_genes <- names(gene_counts)[1:min(10, length(gene_counts))]
}
# Count mutations per gene and get mutation types
gene_counts <- numeric(length(top_genes))
gene_mut_types <- list()
for (i in 1:length(top_genes)) {
gene <- top_genes[i]
gene_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Hugo_Symbol == gene, ]
gene_counts[i] <- nrow(gene_data)
# Count mutation types
if (nrow(gene_data) > 0) {
mut_types <- table(gene_data$Variant_Classification)
gene_mut_types[[gene]] <- mut_types
}
}
# Order genes by mutation count
gene_order <- order(-gene_counts)
gene_counts <- gene_counts[gene_order]
top_genes <- top_genes[gene_order]
# Calculate percentages
total_samples <- length(unique(HCC_Final_non_intron@data$Tumor_Sample_Barcode))
gene_percentages <- paste0(round(gene_counts / total_samples * 100, 0), "%")
# Create the plot
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Top_Mutated_Genes.png", width = 10, height = 8, units = "in", res = 300, bg = "white")
# Set up the plot margins for horizontal bars
par(mar = c(5, 8, 4, 4), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Calculate x-axis limit with some padding
x_max <- max(gene_counts) * 1.3
# Create horizontal barplot
barplot_result <- barplot(gene_counts,
horiz = TRUE,
names.arg = FALSE,
col = "#4682B4",
border = NA,
xlim = c(0, x_max),
axes = FALSE,
main = "Top 10 mutated genes")
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(gene_counts))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(gene_counts))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add y-axis labels (gene names in italics)
text(x = -max(gene_counts) * 0.1,
y = barplot_result,
labels = top_genes,
xpd = TRUE,
adj = 1,
cex = 0.9,
font = 3) # 3 = italic
# Add percentage labels
text(x = gene_counts + max(gene_counts) * 0.05,
y = barplot_result,
labels = gene_percentages,
xpd = TRUE,
adj = 0,
cex = 0.9,
font = 2) # 2 = bold
# Add x-axis label
mtext("Number of mutations", side = 1, line = 2.5, cex = 0.9)
dev.off()
cat("Created Top_Mutated_Genes.png\n")
# Try to create a stacked bar version if we have mutation type data
if (length(gene_mut_types) > 0) {
tryCatch({
# Set up mutation type colors
mut_type_colors <- mutation_colors
# Create the plot
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Top_Mutated_Genes_Stacked.png", width = 12, height = 8, units = "in", res = 300, bg = "white")
# Set up the plot area
par(mar = c(5, 9, 4, 10), mgp = c(3, 0.7, 0),
cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
family = "sans")
# Create empty plot area
plot(0, 0, type = "n",
xlim = c(0, max(gene_counts) * 1.3),
ylim = c(0.5, length(top_genes) + 0.5),
xlab = "", ylab = "", axes = FALSE,
main = "Top mutated genes by mutation type")
# Add custom x-axis with gridlines
axis(1, at = pretty(c(0, max(gene_counts))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
abline(v = pretty(c(0, max(gene_counts))), col = "lightgray", lty = "dotted", lwd = 0.7)
# Add x-axis label
mtext("Number of mutations", side = 1, line = 2.5, cex = 0.9)
# Add gene names on y-axis
text(x = -max(gene_counts) * 0.05,
y = length(top_genes):1,
labels = top_genes,
xpd = TRUE,
adj = 1,
cex = 0.9,
font = 3) # 3 = italic
# Add percentage labels
text(x = gene_counts + max(gene_counts) * 0.05,
y = length(top_genes):1,
labels = gene_percentages,
xpd = TRUE,
adj = 0,
cex = 0.9,
font = 2) # 2 = bold
# Create stacked bars
for (i in 1:length(top_genes)) {
gene <- top_genes[i]
if (gene %in% names(gene_mut_types)) {
mut_counts <- gene_mut_types[[gene]]
# Start position for each segment
x_start <- 0
# Sort mutation types
mut_counts <- sort(mut_counts, decreasing = TRUE)
for (mut_type in names(mut_counts)) {
count <- mut_counts[mut_type]
# Choose color
if (mut_type %in% names(mut_type_colors)) {
color <- mut_type_colors[mut_type]
} else {
color <- "#CCCCCC" # Gray for unknown types
}
# Draw rectangle for this mutation type
rect(x_start, length(top_genes) - i + 0.7,
x_start + count, length(top_genes) - i + 1.3,
col = color, border = NA)
# Move x_start for next segment
x_start <- x_start + count
}
}
}
# Add legend
all_mut_types <- unique(unlist(lapply(gene_mut_types, names)))
legend_mut_types <- intersect(all_mut_types, names(mut_type_colors))
if (length(legend_mut_types) > 0) {
legend_colors <- mut_type_colors[legend_mut_types]
legend("topright",
legend = legend_mut_types,
fill = legend_colors,
border = NA,
bty = "n",
cex = 0.8,
xpd = TRUE,
inset = c(-0.25, 0))
}
dev.off()
cat("Created Top_Mutated_Genes_Stacked.png\n")
}, error = function(e) {
cat("Error creating stacked bar plot:", e$message, "\n")
})
}
}, error = function(e) {
cat("Error creating Top Mutated Genes plot:", e$message, "\n")
})
cat("\nAll annotated plots have been saved to the 'publication_figures' directory.\n")
}## Created Variant_Classification.png
## Created Variant_Type.png
## Created SNV_Class.png
## Created Combined_Mutation_Summary.png
## Created Top_Mutated_Genes.png
## Created Top_Mutated_Genes_Stacked.png
##
## All annotated plots have been saved to the 'publication_figures' directory.
if (exists("HCC_Final_non_intron") && exists("HCC_genes")) {
# Generate oncoplot for HCC genes
tryCatch({
oncoplot(
maf = HCC_Final_non_intron,
genes = HCC_genes,
clinicalFeatures = c('Aetiology', 'BCLC', 'Cirrhosis'),
sortByAnnotation = TRUE,
annotationColor = NULL,
removeNonMutated = FALSE,
showTumorSampleBarcodes = TRUE,
fontSize = 0.7,
SampleNamefontSize = 0.8,
legendFontSize = 1,
annotationFontSize = 0.8,
showTitle = TRUE,
titleText = "Mutation Profile of Key HCC Genes",
showPct = TRUE
)
}, error = function(e) {
cat("Error in generating oncoplot:", e$message, "\n")
cat("Trying with fewer options.\n")
# Simplified oncoplot
oncoplot(
maf = HCC_Final_non_intron,
genes = HCC_genes,
showTumorSampleBarcodes = TRUE
)
})
}# ------------------------------------------------------------------
# Extended oncoplot – top 18 genes with detailed clinical tracks
# ------------------------------------------------------------------
if (exists("HCC_Final_non_intron")) {
#--- make sure the clinical annotation names really exist -----------
anno_cols <- colnames(HCC_Final_non_intron@clinical.data)
# Use the long name only if it is available, otherwise fall back to BCLC
clin_feats <- c(
'Aetiology',
'Condition1',
if ('Barcelona_Clinic_Liver_Cancer' %in% anno_cols) {
'Barcelona_Clinic_Liver_Cancer'
} else if ('BCLC' %in% anno_cols) {
'BCLC'
} else {
NULL
}
)
#--------------------------------------------------------------------
tryCatch({
oncoplot(
maf = HCC_Final_non_intron,
top = 18,
genes = c("APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A",
"CTNNB1", "HNF1A", "JAK1", "KEAP1", "LZTR1",
"PIK3CA", "PTEN", "RB1", "SF3B1", "SMARCA4",
"TERT", "TP53"),
altered = FALSE,
drawRowBar = TRUE,
drawColBar = TRUE,
includeColBarCN = TRUE,
clinicalFeatures = clin_feats,
annotationColor = NULL,
draw_titv = FALSE,
showTumorSampleBarcodes = TRUE,
barcodeSrt = 90,
gene_mar = 5,
anno_height = 1,
legend_height = 4,
sortByAnnotation = FALSE,
groupAnnotationBySize = TRUE,
sortByMutation = FALSE,
keepGeneOrder = FALSE,
GeneOrderSort = TRUE,
removeNonMutated = TRUE,
bgCol = "#CCCCCC",
borderCol = "white",
fontSize = 0.7,
SampleNamefontSize = 0.9,
titleFontSize = 1.5,
legendFontSize = 1.2,
annotationFontSize = 1.2,
showTitle = TRUE,
titleText = "Top 18 Mutated Genes – Detailed Oncoplot",
showPct = TRUE
)
}, error = function(e) {
message("Oncoplot (top 18) could not be generated: ", e$message)
})
}#plot titv summary
plotTiTv(
res = laml.titv,
plotType = "both",
sampleOrder = HCC_Final_non_intron@clinical.data$Tumor_Sample_Barcode,
color = NULL,
showBarcodes = TRUE,
textSize = 0.8,
baseFontSize = 1,
axisTextSize = c(1, 1),
plotNotch = FALSE
)if (exists("HCC_Final_non_intron")) {
# Create lollipop plot for TP53
if ("TP53" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
tryCatch({
lollipopPlot(
maf = HCC_Final_non_intron,
gene = "TP53",
AACol = "Protein_Change",
showMutationRate = TRUE,
labelPos = "all"
)
}, error = function(e) {
cat("Error in generating TP53 lollipop plot:", e$message, "\n")
cat("TP53 mutations are present but plot could not be generated.\n")
})
} else {
cat("No TP53 mutations found in this dataset.\n")
}
}## HGNC refseq.ID protein.ID aa.length
## <char> <char> <char> <num>
## 1: TP53 NM_000546 NP_000537 393
## 2: TP53 NM_001126112 NP_001119584 393
## 3: TP53 NM_001126113 NP_001119585 346
## 4: TP53 NM_001126114 NP_001119586 341
## 5: TP53 NM_001126115 NP_001119587 261
## 6: TP53 NM_001126116 NP_001119588 209
## 7: TP53 NM_001126117 NP_001119589 214
## 8: TP53 NM_001126118 NP_001119590 354
if (exists("HCC_Final_non_intron")) {
# Create lollipop plot for CTNNB1
if ("CTNNB1" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
tryCatch({
lollipopPlot(
maf = HCC_Final_non_intron,
gene = "CTNNB1",
AACol = "Protein_Change",
showMutationRate = TRUE,
labelPos = "all"
)
}, error = function(e) {
cat("Error in generating CTNNB1 lollipop plot:", e$message, "\n")
cat("CTNNB1 mutations are present but plot could not be generated.\n")
})
} else {
cat("No CTNNB1 mutations found in this dataset.\n")
}
}## HGNC refseq.ID protein.ID aa.length
## <char> <char> <char> <num>
## 1: CTNNB1 NM_001098209 NP_001091679 781
## 2: CTNNB1 NM_001098210 NP_001091680 781
## 3: CTNNB1 NM_001904 NP_001895 781
if (exists("HCC_Final_non_intron")) {
# Create lollipop plot for TERT
if ("TERT" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
tryCatch({
lollipopPlot(
maf = HCC_Final_non_intron,
gene = "TERT",
AACol = "Protein_Change",
showMutationRate = TRUE,
labelPos = "all"
)
}, error = function(e) {
cat("Error in generating TERT lollipop plot:", e$message, "\n")
cat("TERT mutations are present but plot could not be generated.\n")
})
} else {
cat("No TERT mutations found in this dataset.\n")
}
}## HGNC refseq.ID protein.ID aa.length
## <char> <char> <char> <num>
## 1: TERT NM_001193376 NP_001180305 1069
## 2: TERT NM_198253 NP_937983 1132
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/somatic_interactions.png", width = 12, height = 9, units = "in", res = 300)
par(mar = c(5, 8, 4, 2)) # Increase left margin (2nd value)
somaticInteractions(
maf = HCC_Final_non_intron,
genes = HCC_genes,
pvalue = c(0.05, 0.1),
leftMar = 8,
topMar = 8,# Default is 4, increase to 8 or more
fontSize = 1,
showSigSymbols = TRUE,
returnAll = FALSE # Make sure this is FALSE to get a plot instead of data
)## quartz_off_screen
## 2
# Define the list of HCC genes to include
HCC_genes <- c(
"APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A", "CTNNB1", "HNF1A",
"JAK1", "KEAP1", "LZTR1", "PIK3CA", "PTEN", "RB1", "SF3B1",
"SMARCA4", "TERT", "TP53"
)
# Define the extract_vaf_data function
extract_vaf_data <- function(maf_object, group_label) {
# Extract the variants data from the MAF object
variants <- maf_object@data
# Select relevant columns and add group label
result <- variants %>%
select(Hugo_Symbol, tumor_f) %>%
mutate(Group = group_label)
return(result)
}
# Extract and process data as before
pre_data <- extract_vaf_data(pre_tace_samples, "Pre-TACE")
post_data <- extract_vaf_data(post_tace_samples, "Post-TACE")
# Combine the data
combined_data <- bind_rows(pre_data, post_data)
# Filter out NA values and restrict to only the specified HCC genes
combined_data <- combined_data %>%
filter(!is.na(tumor_f)) %>%
filter(Hugo_Symbol != "NA" & !is.na(Hugo_Symbol) & Hugo_Symbol != "") %>%
filter(Hugo_Symbol %in% HCC_genes) # Add this line to filter for HCC genes only
# Important: Set factor levels to control the order
combined_data$Group <- factor(combined_data$Group, levels = c("Pre-TACE", "Post-TACE"))
# Set gene order to match the original vector order
combined_data$Hugo_Symbol <- factor(combined_data$Hugo_Symbol, levels = HCC_genes)
# Create side-by-side boxplots with Pre-TACE first
vaf_plot_ordered <- ggplot(combined_data, aes(x = Hugo_Symbol, y = tumor_f, fill = Group)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.7, outlier.size = 0.5, na.rm = TRUE) +
scale_fill_manual(values = c("Pre-TACE" = "#3498db", "Post-TACE" = "#e74c3c")) +
labs(
title = "VAF Comparison in HCC-Related Genes",
x = "Gene",
y = "Variant Allele Frequencies (VAF)",
fill = ""
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
axis.text.y = element_text(size = 10),
legend.position = "top",
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")
)
# Save the plot
ggsave("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/pre_post_vaf_comparison_hcc_genes.png", vaf_plot_ordered, width = 10, height = 6)
# For the stacked vertical panels version, the same factor setting ensures correct order
vaf_plot_stacked <- ggplot(combined_data, aes(x = Hugo_Symbol, y = tumor_f, fill = Group)) +
geom_boxplot(outlier.size = 0.5) +
scale_fill_manual(values = c("Pre-TACE" = "#3498db", "Post-TACE" = "#e74c3c")) +
facet_grid(Group ~ ., scales = "free_y") + # Pre-TACE will now appear on top
labs(
title = "VAF Comparison in HCC-Related Genes",
x = "Gene",
y = "Variant Allele Frequencies (VAF)",
fill = ""
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
axis.text.y = element_text(size = 10),
strip.text = element_text(size = 12, face = "bold"),
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold")
)
# Save the plots
ggsave("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/vaf_comparison_stacked_panels_hcc_genes.png", vaf_plot_stacked, width = 10, height = 8)
print(vaf_plot_ordered)# Enhanced wrapper function that completely rewrites the forestPlot function
# but removes sample counts from the title and shows actual p-values as decimals
forestPlot_noSamples <- function(mafCompareRes, pVal = 0.05, fdr = NULL,
color=c('maroon','royalblue'),
geneFontSize = 0.8, titleSize = 1.2, lineWidth = 1){
if(is.null(fdr)){
m.sigs = mafCompareRes$results[pval < pVal]
}else{
m.sigs = mafCompareRes$results[adjPval < fdr]
}
m1Name = mafCompareRes$SampleSummary[1, Cohort]
m2Name = mafCompareRes$SampleSummary[2, Cohort]
m1.sampleSize = mafCompareRes$SampleSummary[1, SampleSize]
m2.sampleSize = mafCompareRes$SampleSummary[2, SampleSize]
# Handle color parameters
if (length(color)==1){
vc_col = c(color,color)
}else if(length(color==2)){
vc_col = color
}else{
stop('colors length must be less equal than 2')
}
if (is.null(names(vc_col))){
names(vc_col)=c(m2Name,m1Name)
}else if (!(m1Name %in% names(vc_col)) && !(m2Name %in% names(vc_col))){
stop(paste0('\ninput named vector [color] must contain both group name, \nwhich should be like c(',
m1Name,', ', m2Name,') but now are ',list(names(vc_col)),'\n'))
}else if (!(m1Name %in% names(vc_col)) || !(m2Name %in% names(vc_col))){
stop(paste0('\nif you pass [color] with named vector, then both of the names matching group names must be Explicitly declared,\n',
'which should be like c(',
m1Name,', ', m2Name,') but now are ',list(names(vc_col)),'\n'))
}
if(nrow(m.sigs) < 1){
stop('No differetially mutated genes found !')
}
m.sigs$Hugo_Symbol = factor(x = m.sigs$Hugo_Symbol, levels = rev(m.sigs$Hugo_Symbol))
m.sigs$or_new = ifelse(test = m.sigs$or > 3, yes = 3, no = m.sigs$or)
m.sigs$upper = ifelse(test = m.sigs$ci.up > 3, yes = 3, no = m.sigs$ci.up)
m.sigs$lower = ifelse(test = m.sigs$ci.low > 3, yes = 3, no = m.sigs$ci.low)
#Reverse by significance
m.sigs$pos = rev(1:nrow(m.sigs))
m.sigs = m.sigs[order(pos)]
m.sigs$pos = 1:nrow(m.sigs)
xlims = c(0, 4)
ylims = c(0.75, nrow(m.sigs))
graphics::layout(mat = matrix(c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6), byrow = TRUE, ncol = 5, nrow = 2), widths = c(4, 1, 1), heights = c(6, 1.2))
par(mar = c(3, 1, 3, 5))
plot(NA, xlim = xlims, ylim = ylims, axes= FALSE, xlab = NA, ylab = NA)
apply(m.sigs[,.(or, ci.up, ci.low, ci.up, or_new, upper, lower, pos)], 1, function(x){
p = x[5]; u = x[6]; u_orig = x[2]; l = x[7]; l_orig = x[3]; ypos = x[8]
if (p<1){
linecolor = vc_col[m2Name]
}else if(p>1){
linecolor = vc_col[m1Name]
}else{
linecolor = 'black'
}
points(x = p, y = ypos, pch = 16, cex = 1.1*(lineWidth))
segments(x0 = l, y0 = ypos, x1 = u, y1 = ypos, lwd = lineWidth,col = linecolor)
if(u_orig >3){
segments(x0 = 3, y0 = ypos, x1 = 3.25, y1 = ypos, lwd = lineWidth,col=linecolor)
points(x = 3.25, y = ypos, pch = ">", cex = 1.1*(lineWidth))
}
if(l_orig >3){
segments(x0 = 3, y0 = ypos, x1 = 3.25, y1 = ypos, lwd = lineWidth,col=linecolor)
points(x = 3.25, y = ypos, pch = ">", cex = 1.1*(lineWidth))
}
})
abline(v = 1, lty = 2, col = "gray", xpd = FALSE)
axis(side = 1, at = 0:3, labels = c(0:3), font = 1, pos = 0.5, cex.axis = 1.3)
mtext(text = m.sigs$Hugo_Symbol, side = 4, line = 0.2, at = 1:nrow(m.sigs),
font = 3, las= 2, cex = geneFontSize, adj = 0)
# KEY CHANGE: Remove sample counts from title
mtitle = paste(m1Name, ' Vs. ', m2Name, sep='')
title(main = mtitle, font = 1, adj = 0, cex.main = titleSize)
# Plot annotation columns
par(mar = c(3, 0, 3, 0))
plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
pch = NA, xlab = "", ylab = "", ylim = ylims)
text(x = 0.5, y = 1:nrow(m.sigs), labels = as.numeric(unlist(m.sigs[,3])),
adj = 0, font = 1, cex = 1.4*(geneFontSize))
title(main = m2Name, cex.main = titleSize)
par(mar = c(3, 0, 3, 0))
plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
pch = NA, xlab = "", ylab = "", ylim = ylims)
text(x = 0.5, y = 1:nrow(m.sigs), labels = as.numeric(unlist(m.sigs[,2])),
adj = 0, font = 1, cex = 1.4*(geneFontSize))
title(main = m1Name, cex.main = titleSize)
par(mar = c(3, 0, 3, 0))
plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
pch = NA, xlab = "", ylab = "", ylim = ylims)
text(x = 0.5, y = 1:nrow(m.sigs), labels = round(m.sigs$or, digits = 3),
adj = 0.5, font = 1, cex = 1.4*(geneFontSize))
title(main = "OR", cex.main = titleSize)
# MODIFIED CODE: Show actual p-values as decimal numbers instead of "NS"
m.sigs$significance = ifelse(test = as.numeric(m.sigs$pval) < 0.001, yes = "***", no =
ifelse(test = as.numeric(m.sigs$pval) < 0.01, yes = "**", no =
ifelse(test = as.numeric(m.sigs$pval) < 0.05, yes = "*", no =
format(round(as.numeric(m.sigs$pval), 3), nsmall=3))))
par(mar = c(3, 0, 3, 0))
plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
pch = NA, xlab = "", ylab = "", ylim = ylims)
text(x = 0.5, y = 1:nrow(m.sigs), labels = m.sigs$significance,
adj = 0, font = 1, cex = 1.4*(geneFontSize))
title(main = "P-value", cex.main = titleSize)
par(mar = c(0, 0, 0, 0))
plot(NA, xlim = c(0,1), ylim = c(0, 1), axes = FALSE, xlab = NA, ylab = NA)
text(x = 0, labels = paste0(
"Odds ratio with 95% CI\n(1 = no effect, < 1 ",
m2Name,
" has more mutants)"
), y = 0.6, adj = 0, xpd = TRUE, cex = 1.2)
}# Run mafCompare analysis
res = mafCompare(
m1 = ALLBCLC_NoC,
m2 = BCLC_C,
m1Name = 'A+B',
m2Name = 'C',
minMut = 1,
useCNV = FALSE,
pathways = NULL,
custom_pw = NULL,
pseudoCount = TRUE
)
# Print the comparison results
#print(res)
# Use the modified forest plot function
forestPlot_noSamples(
mafCompareRes = res,
pVal = 0.5,
lineWidth = 2,
color = c('royalblue', 'maroon'),
geneFontSize = 1
)# Run mafCompare analysis
res = mafCompare(
m1 = ALLsample_Viralsamples,
m2 = ALLsample_NoViral,
m1Name = 'Viral',
m2Name = 'Non-Viral',
minMut = 1,
useCNV = FALSE,
pathways = NULL,
custom_pw = NULL,
pseudoCount = TRUE
)
# Print the comparison results
#print(res)
# Use the modified forest plot function
forestPlot_noSamples(
mafCompareRes = res,
pVal = 0.5,
lineWidth = 2,
color = c('royalblue', 'maroon'),
geneFontSize = 1
)